library(cytomapper)
library(dplyr)
library(ggplot2)
library(simpleSeg)
library(FuseSOM)
library(ggpubr)
library(scater)
library(spicyR)
#library(ClassifyR)
library(ClassifyR, lib.loc = "~/localPackages/")
library(scFeatures)
library(lisaClust)

theme_set(theme_classic())
#withr::with_libpaths(new = "~/localPackages/", devtools::install_github("SydneyBioX/ClassifyR"))

Global paramaters

nCores <- 20
BPPARAM <- simpleSeg:::generateBPParam(nCores)

Read in images

Our images are stored in the images folder within the Data folder. Here we use readImages() from the EBImage package to read these into R. If memory is a restricting factor, and the files are in a slightly different format, you could use loadImages() from the cytomapper package to load all of the tiff images into a CytoImageList object, which can store the images as h5 on-disk.

pathToImages <- "Data/images"

# Get directories of images
imageDirs <- dir(pathToImages, full.names = TRUE)
names(imageDirs) <- dir(pathToImages, full.names = FALSE)

# Get files in each directory
files <- sapply(imageDirs, list.files, pattern = "tif", full.names = TRUE, simplify = FALSE)

# Read files with readImage from EBImage
images <- lapply(files, EBImage::readImage, as.is = TRUE)

We will make use of the on_disk option to convert our images to a CytoImageList with the images not held in memory.

# Store images in a CytoImageList with images on_disk as h5 files to save memory. 
dir.create("Data/h5Files")
images <- cytomapper::CytoImageList(images, 
                                    on_disk = TRUE, 
                                    h5FilesPath = "Data/h5Files", 
                                    BPPARAM = BPPARAM)
gc()
##            used  (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells 17218109 919.6   24979905  1334.1   24979905  1334.1
## Vcells 81827702 624.3 3057820234 23329.4 3822275292 29161.7
# images <- cytomapper::loadImages("Data/h5Files/", pattern = "h5", h5FilesPath = "Data/h5Files/",on_disk = TRUE)
# mcols(images) <- S4Vectors::DataFrame(imageID = names(images))

Load the clinical data

Read the clinical data

clinical <- read.csv("Data/1-s2.0-S0092867421014860-mmc1.csv")
clinical <- clinical |>
  mutate(imageID = paste0("Point", PointNumber, "_pt", Patient_ID, "_", TMAD_Patient))
clinical$imageID[grep("normal", clinical$Tissue_Type)] <- paste0(clinical$imageID[grep("normal", clinical$Tissue_Type)], "_Normal")

clinicalVariables <- c("imageID", "Patient_ID","Status", "Age", "SUBTYPE", "PAM50", "Treatment", "DCIS_grade", "Necrosis")
rownames(clinical) <- clinical$imageID

Put the clinical data into the colData of SingleCellExperiment

mcols(images) <- clinical[names(images), clinicalVariables]

Segment the images

Run simpleSeg

masks <- simpleSeg(images,
                   nucleus = c("PCA", "HH3"),
                   cellBody = "dilate",
                   transforms = "sqrt",
                   sizeSelection = 40,
                   discSize = 2,
                   cores = nCores)

Visualise separation

EBImage::display(colorLabels(masks[[1]]))

Visualise outlines

cytomapper::plotPixels(image = images[1:3], 
                       mask = masks[1:3], 
                       img_id = "imageID", 
                       colour_by = c("PanKRT", "GLUT1", "HH3", "CD3", "CD20"), 
                       display = "single",
                       colour = list(HH3 = c("black","blue"), 
                                     CD3 = c("black","purple"),
                                     CD20 = c("black","green"),
                                     GLUT1 = c("black", "red"),
                                     PanKRT = c("black", "yellow")),
                       bcg = list(HH3 = c(0, 1, 1.5), 
                                     CD3 = c(0, 1, 1.5),
                                     CD20 = c(0, 1, 1.5),
                                     GLUT1 = c(0, 1, 1.5),
                                     PanKRT = c(0, 1, 1.5)),
                       legend = NULL)

Summarise cell features.

cells <- cytomapper::measureObjects(masks, 
                                    images, 
                                    img_id = "imageID", 
                                    BPPARAM = BPPARAM)

Normalize data

  df <- as.data.frame(cbind(colData(cells), t(assay(cells, "counts"))))

  ggplot(df, aes(x = PanKRT, colour = imageID)) + 
    geom_density() + 
    theme(legend.position = "none")

cells <- normalizeCells(cells, 
                        transformation = "sqrt", 
                        method = c("quant99", "minMax"), 
                        assayIn = "counts", 
                        cores = nCores)


df <- as.data.frame(cbind(colData(cells), t(assay(cells, "norm"))))

ggplot(df, aes(x = PanKRT, colour = imageID)) + 
  geom_density() + 
  theme(legend.position = "none")

Cluster cells using FuseSOM package

Perform the clustering

#The markers used in the original publication to gate cell types
useMarkers <- c("PanKRT", "ECAD", "CK7", "VIM", "FAP", "CD31", "CK5", "SMA", 
                "CD45", "CD4", "CD3", "CD8", "CD20", "CD68", "CD14", "CD11c", 
                "HLADRDPDQ", "MPO", "Tryptase")

set.seed(51773)
cells <- runFuseSOM(cells, 
                    markers = useMarkers, 
                    assay = 'norm', 
                    numClusters = 20)

Check how many clusters should be used.

# As I've already run runFuseSOM I don't need to run generateSOM()
cells <- estimateNumCluster(cells, kseq = 2:30)
## 

========
=================
==========================
==================================
==========================================
===================================================
============================================================
====================================================================
============================================================================
=====================================================================================
optiPlot(cells, method = "gap")

scater::plotGroupedHeatmap(cells, 
                           features = useMarkers, 
                           group = "clusters", 
                           exprs_values = "norm",
                           center = TRUE, 
                           scale = TRUE, 
                           zlim = c(-3,3),
                           cluster_rows = FALSE)

colData(cells)$clusters |>
  table() |>
  sort()
## 
##  cluster_2  cluster_5 cluster_16 cluster_10  cluster_7 cluster_18 cluster_11 
##        351        361        378        418        570        666        702 
##  cluster_6 cluster_13  cluster_3 cluster_14  cluster_4 cluster_20 cluster_12 
##        869        971       1090       1405       1484       1626       1767 
## cluster_15  cluster_8 cluster_19 cluster_17  cluster_9  cluster_1 
##       2148       3396       3612       3625       5418      31703
cellsToUse <- cells$Status%in%c("nonprogressor", "progressor")

testProp <- colTest(cells[, cellsToUse], 
                    condition = "Status", 
                    feature = "clusters")

testProp
##              W  pval adjPval    cluster
## cluster_2  190 0.024    0.48  cluster_2
## cluster_13 390 0.130    0.58 cluster_13
## cluster_6  230 0.150    0.58  cluster_6
## cluster_17 380 0.170    0.58 cluster_17
## cluster_14 380 0.180    0.58 cluster_14
## cluster_1  240 0.200    0.58  cluster_1
## cluster_15 370 0.240    0.58 cluster_15
## cluster_11 370 0.250    0.58 cluster_11
## cluster_8  240 0.260    0.58  cluster_8
## cluster_12 360 0.340    0.68 cluster_12
## cluster_9  360 0.380    0.69  cluster_9
## cluster_5  350 0.440    0.73  cluster_5
## cluster_16 280 0.590    0.83 cluster_16
## cluster_18 280 0.600    0.83 cluster_18
## cluster_10 280 0.620    0.83 cluster_10
## cluster_20 290 0.790    0.92 cluster_20
## cluster_19 320 0.810    0.92 cluster_19
## cluster_3  320 0.830    0.92  cluster_3
## cluster_7  300 0.910    0.96  cluster_7
## cluster_4  300 0.960    0.96  cluster_4
imagesToUse <- rownames(clinical)[clinical[, "Status"]%in%c("nonprogressor", "progressor")]

prop <- getProp(cells, feature = "clusters")
clusterToUse <- rownames(testProp)[1]

boxplot( prop[imagesToUse, clusterToUse] ~ clinical[imagesToUse, "Status"] )

set.seed(51773)
cells <- scater::runUMAP(cells, 
                         subset_row = useMarkers, 
                         exprs_values = "norm")

someImages <- unique(colData(cells)$imageID)[c(1,10,20,40,50,60)]

# UMAP by imageID
plotReducedDim(cells[,colData(cells)$imageID %in% someImages], dimred="UMAP", colour_by="imageID")

# UMAP by cell type cluster
plotReducedDim(cells[,colData(cells)$imageID %in% someImages], dimred="UMAP", colour_by="clusters")

spicyR: test spatial relationships

spicyTest <- spicy(cells[, cellsToUse], 
                   condition = "Status", 
                   cellType = "clusters",
                   imageID = "imageID",
                   spatialCoords = c("m.cx", "m.cy"),
                   Rs = c(20, 50, 100),
                   sigma = 50,
                   BPPARAM = BPPARAM)

topPairs(spicyTest, n = 10)
##                         intercept coefficient     p.value adj.pvalue       from
## cluster_18__cluster_17   57.47863   103.82184 0.004269211  0.6412617 cluster_18
## cluster_17__cluster_18   56.23307    92.78098 0.006867987  0.6412617 cluster_17
## cluster_20__cluster_8   -70.27764    60.25065 0.009107886  0.6412617 cluster_20
## cluster_16__cluster_15 -141.08275    56.94516 0.009162453  0.6412617 cluster_16
## cluster_17__cluster_7    21.28862   107.32206 0.012743804  0.6412617 cluster_17
## cluster_8__cluster_20   -60.10460    60.04871 0.013838365  0.6412617  cluster_8
## cluster_15__cluster_16 -142.88187    48.39057 0.014461077  0.6412617 cluster_15
## cluster_6__cluster_6    192.73531   193.59265 0.015132520  0.6412617  cluster_6
## cluster_7__cluster_17    28.22911   107.75547 0.015205549  0.6412617  cluster_7
## cluster_9__cluster_5    -66.44860    75.52276 0.021968425  0.6412617  cluster_9
##                                to
## cluster_18__cluster_17 cluster_17
## cluster_17__cluster_18 cluster_18
## cluster_20__cluster_8   cluster_8
## cluster_16__cluster_15 cluster_15
## cluster_17__cluster_7   cluster_7
## cluster_8__cluster_20  cluster_20
## cluster_15__cluster_16 cluster_16
## cluster_6__cluster_6    cluster_6
## cluster_7__cluster_17  cluster_17
## cluster_9__cluster_5    cluster_5
signifPlot(spicyTest,
           breaks = c(-1.5, 3, 0.5))

lisaClust: Find cellular neighbourhoods

set.seed(51773)
cells <- lisaClust(cells, 
                   k = 7, 
                   Rs = c(20, 50, 100),
                   sigma = 50,
                   spatialCoords = c("m.cx", "m.cy"), 
                   cellType = "clusters", 
                   BPPARAM = BPPARAM)
  df <- colData(cells) |> 
  as.data.frame() |>
  filter(imageID == "Point2206_pt1116_31620")
  
  ggplot(df, aes(x = m.cx, y = m.cy, colour = region)) +
    geom_point()

  # ggplot(df, aes(x = m.cx, y = m.cy, colour = clusters, region = region)) +
  #   geom_point() +
  #   geom_hatching(window = "square", line.spacing = 41) +
  #   scale_region_manual(values = c(region_1 = 1,
  #                                  region_9 = 1,
  #                                  region_10 = 2,
  #                                  region_7 = 2,
  #                                  region_2 = 3,
  #                                  region_3 = 4,
  #                                  region_4 = 4,
  #                                  region_5 = 5,
  #                                  region_8 = 6
  #                                  ))
    


hatchingPlot(cells,
             useImages = "Point2206_pt1116_31620",
             cellType = "clusters",
             spatialCoords = c("m.cx", "m.cy"),
             window = "square",
             nbp = 300,
             line.spacing = 41
            )

testRegion <- colTest(cells[,cellsToUse], 
                      feature = "region",
                      condition = "Status")

testRegion
##            W   pval adjPval  cluster
## region_4 460 0.0067   0.047 region_4
## region_2 180 0.0190   0.066 region_2
## region_5 230 0.1400   0.330 region_5
## region_3 350 0.4900   0.860 region_3
## region_1 290 0.7400   0.910 region_1
## region_6 290 0.7800   0.910 region_6
## region_7 300 0.9600   0.960 region_7
regionMap(cells, cellType = "clusters", limit = c(0.2, 5))

scFeatures: Test some different features

data <- scFeatures(cells, 
                   feature_types = c("proportion_raw", "gene_mean_celltype"),
                   sample = "imageID",
                   celltype = "clusters",
                   assay = "norm",
                   ncores = nCores )
## [1] "generating proportion raw features"
## [1] "generating gene mean celltype features"
names(data) <- c("prop", "mean")
imagesToUse <- rownames(clinical)[clinical[, "Status"]%in%c("nonprogressor", "progressor")]

test <- colTest(data$mean[imagesToUse,],
                condition = clinical[imagesToUse, "Status"])

test |> head()
##                    W   pval adjPval          cluster
## cluster_19--PDL1 210 0.0016    0.69 cluster_19--PDL1
## cluster_3--PD1   200 0.0043    0.69   cluster_3--PD1
## cluster_6--PDL1  170 0.0053    0.69  cluster_6--PDL1
## cluster_5--P     460 0.0055    0.69     cluster_5--P
## cluster_19--P    460 0.0072    0.69    cluster_19--P
## cluster_17--P    460 0.0073    0.69    cluster_17--P

ClassifyR: Classification

data[["regions"]] <- getProp(cells, "region")

data <- lapply(data, as.data.frame)

#Subset data
measurements <- lapply(data, function(x)x[imagesToUse, ])

set.seed(51773)
cv <- crossValidate(measurements = measurements, 
                    outcome = clinical[imagesToUse, "Status"],
                    nFeatures = 5,
                    classifier = "elasticNetGLM",
                    nFolds = 5,
                    nRepeats = 100,
                    nCores = nCores
                    )

ROCplot(cv, comparison = "Assay Name")

performancePlot(cv,
                performanceName = "AUC",
                characteristicsList = list(x = "Assay Name"))